load("XSTSF_production.RData")
source('functions.R')
f0_all_ct <- f0_all_pre %>% filter(focus_condition == 'ct' ) %>%
group_by(speaker) %>%
mutate(norm_f0 = scale(log(f0))) %>%
ungroup()
f0_mono <- f0_all_ct %>% filter(is.na(diortri) == TRUE)
distri_prop2(f0_mono, hist_tone1, sync_tone1)
f0_mono %>% filter(hist_tone1 == 'yangshang' & sync_tone1 == 'LH') %>% select(token, speaker) %>% distinct()
## # A tibble: 5 × 2
## token speaker
## <chr> <fct>
## 1 买 S1
## 2 买 S2
## 3 买 S3
## 4 买 S6
## 5 买 S8
f0_mono %>% filter(hist_tone1 == 'yangping' & sync_tone1 == 'LH') %>% select(token, speaker) %>% distinct()
## # A tibble: 2 × 2
## token speaker
## <chr> <fct>
## 1 梅 S3
## 2 莓 S6
f0_mono %>% filter(token == '买') %>% select(token, speaker, sync_tone1) %>% distinct()
## # A tibble: 8 × 3
## token speaker sync_tone1
## <chr> <fct> <chr>
## 1 买 S1 LH
## 2 买 S2 LH
## 3 买 S3 LH
## 4 买 S4 RF
## 5 买 S5 RF
## 6 买 S6 LH
## 7 买 S7 RF
## 8 买 S8 LH
A function to plot f0 contours
p_cluster <- function(df_cluster, x, y = NULL, avg_line_width = 2.5){
p_cluster <- df_cluster %>%
ggplot(aes(x = time, y = norm_f0, group = ind_no, color = {{x}},
text = paste('speaker: ', speaker,
'\ntoken_no: ', citation_no,
'\ntoken: ', token,
'\ntime: ', time,
'\nnorm_f0: ', norm_f0))) +
geom_line(alpha = 0.2) +
scale_color_ptol() +
stat_summary(fun = mean, geom = "line", lwd = avg_line_width, aes(group = {{x}}), lty = 1) +
xlab("Normalised time") +
ylab("z-scores of log-f0") +
labs(color = "tone") +
scale_color_manual(values = c("#4477AA", "#CC6677", "#DDCC77", "#117733"))+
theme_minimal() +
theme(legend.position = "top",
text = element_text(family = 'Times New Roman', size = 20),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 20)))
if (!is.null(y)) {
p_cluster <- p_cluster + facet_wrap(as.formula(paste("~", y)), ncol = 4, labeller = label_both)
}
return(p_cluster)
}
Auditory cluster
p_cluster(f0_mono, citation_tone, avg_line_width = 4)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
k-means clustering
functions
# compare k-means and human inspection cluster results
heatmap_data <- function(df, x){
heatmap_df <- df %>% filter(time == 1) %>%
group_by(citation_tone, {{x}}) %>%
count() %>%
ungroup() %>%
group_by(citation_tone) %>%
mutate(count = sum(n),
prop = n/count) %>%
ungroup()
return(heatmap_df)
}
compare_cluster <- function(df, x){
heatmap_plot<-ggplot(df,aes_string(x=x,y='citation_tone',fill='prop'))+
# scale_fill_continuous(breaks=c(0,0.5,1))+
geom_tile()+xlab("cluster")+labs(fill="Frequency")+
geom_text(aes(label = sprintf("%.2f", prop)),size=5, color = 'white')+
scale_fill_viridis(direction = -1)+
#scale_color_viridis(direction = -1)+
#scale_fill_gradient(low = "yellow", high = "green", breaks = c(0, 0.5, 1))+
theme_minimal()+
theme(text = element_text(family = 'Times New Roman', size = 20),
axis.title.x = element_text(margin = margin(t = 15)),
#axis.text.x = element_text(color = c("#4477AA", "#CC6677", "#DDCC77", "#117733"), face = "bold"),
#axis.text.y = element_text(color = c("#4477AA", "#CC6677", "#DDCC77", "#117733"), face = "bold"),
panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
xlab('k-means cluster')+
ylab('perceptual cluster')
return(heatmap_plot)
}
# data preparation
f0_mono_kmeans <- f0_mono %>%
select(-diortri, -syllable_no, -focus_no, -f0) %>%
spread(time, norm_f0)
# k-means clustering
cluster_model <- k_means_clustering(f0_mono_kmeans)
kml(cluster_model, nbClusters = 2:10)
## ~ Fast KmL ~
## ***************************************************************************************************S
## 100 ********************************************************************************S
plotAllCriterion(cluster_model)
kml::plot(cluster_model, 4, parTraj=parTRAJ(col="clusters"))
# get cluster results
f0_mono_kmeans <- f0_mono_kmeans %>%
mutate(cluster4 = getClusters(cluster_model, 4))
f0_mono_kmeans <- wide_to_long(f0_mono_kmeans)
k-means cluster visualisation
p_kmeans4 <- p_cluster(f0_mono_kmeans, cluster4);p_kmeans4
heatmap distribution
heatmap_df <- heatmap_data(f0_mono_kmeans, cluster4)
p_htmap4 <- compare_cluster(heatmap_df, 'cluster4'); p_htmap4
The plot below shows the numbers of tokens collected for each tone.
f0_mono_count_all <- f0_mono %>%
group_by(citation_tone) %>%
count() %>%
mutate(n = n/10)
f0_mono_count_all
## # A tibble: 4 × 2
## # Groups: citation_tone [4]
## citation_tone n
## <chr> <dbl>
## 1 HH 182
## 2 HL 58
## 3 LH 39
## 4 RF 153
f0_mono %>%
group_by(citation_tone, speaker) %>%
count() %>%
mutate(n = n/10) %>%
ggplot(aes(x = citation_tone, y = n, fill = speaker))+
geom_bar(stat="identity", position=position_dodge())+
geom_text(aes(label=n), vjust=1.6, color="black",
position = position_dodge(0.9), size=3.5)+
scale_fill_brewer(palette="Paired")
by speaker
p_cluster(f0_mono, citation_tone, 'speaker', avg_line_width = 1.5)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
ggplotly(p_cluster(f0_mono, citation_tone, 'speaker', avg_line_width = 1.5),
tooltip = c('text', 'x'))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
f0_mono %>% filter(speaker == "S7" & citation_tone == "HL") %>% select(ind_no) %>% distinct()
## # A tibble: 6 × 1
## ind_no
## <chr>
## 1 S7_12_ct
## 2 S7_18_ct
## 3 S7_23_ct
## 4 S7_25_ct
## 5 S7_37_ct
## 6 S7_48_ct